# -*- coding: utf-8 -*-
import copy
import random
from scrapy import Request
from scrapy.utils.project import get_project_settings

from zc_core.client.mongo_client import Mongo
from zc_core.model.items import Box
from zc_core.pipelines.helper.catalog_helper import CatalogHelper
from zc_core.spiders.base import BaseSpider
from zc_core.dao.batch_dao import BatchDao
from ronghw.rules import *


class FullSpider(BaseSpider):
    name = 'full'

    # 常用链接
    list_url = 'https://mall.ronghw.cn/search/product?page={}&rows=20&keywords=&suppliers%5B%5D={}&orderby=sales&isAsc=false'

    # 常用链接
    # item_url = 'https://www.cncecyc.com/share-ecommerce/applyComm/findThirdShopDetails'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        settings = get_project_settings()
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        self.catalog_helper = CatalogHelper()

    def _build_list_request(self, callback, sp_id, page=1):
        req = Request(
            url=self.list_url.format(page,sp_id),
            meta={
                'reqType': 'item',
                'batchNo': self.batch_no,
                'page': page,
                'sp_id': sp_id
            },
            headers={
                'Referer': 'https://mall.ronghw.cn/html/search.html?keywords_|z_0'
            },
            callback=callback,
            errback=self.error_back,
            priority=20,
        )

        return req

    def start_requests(self):
        pool_list = Mongo().list('supplier_pool', fields={'_id': 1})
        self.logger.info('全量：%s' % (len(pool_list)))
        random.shuffle(pool_list)
        for sp in pool_list:
            sp_id = sp.get('_id')

            # 采第一页
            yield self._build_list_request(self.parse_sku_list, sp_id)

    # 处理ItemData
    def parse_sku_list(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        sp_id = meta.get('sp_id')

        data_list, total_page = parse_sku_list(response,self.catalog_helper)
        if data_list:
            self.logger.info('清单1: sp=%s, page=%s, cnt=%s' % (sp_id, cur_page, len((data_list))))
            yield Box('item', self.batch_no, data_list)
            # 分页逻辑
            if total_page and cur_page == 1:
                for page in range(2, total_page + 1):
                    yield self._build_list_request(self.parse_sku_list, sp_id, page=page)
        else:
            self.logger.error('空页: sp=%s, page=%s' % (sp_id, cur_page))

